package com.xunshibao;

import java.io.IOException;
import java.util.ListIterator;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class QQSpider {

    /**
     * @param args
     */
    public static void main(String[] args) {
        HttpClient client = new DefaultHttpClient();
        HttpGet get = new HttpGet("http://www.javaeye.com/news/20144");
        get.addHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13");
        try {
            HttpResponse resp = client.execute(get);
            HttpEntity entity = resp.getEntity();
            if(entity != null) {
                long len = entity.getContentLength();
                if(len != -1) {
                    String content = EntityUtils.toString(entity, "gbk");
                    parseMain(content);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
//        try {
//            HttpResponse resp = client.execute(get);
//            HttpEntity entity = resp.getEntity();
//            System.out.println(resp.getStatusLine().getStatusCode());
//            if (entity != null) {
//                long len = entity.getContentLength();
//                if (len != -1) {
//                    // System.out.println(EntityUtils.toString(entity));
//                    String content = EntityUtils.toString(entity,"gbk");
////                    System.out.println(content);
//                    Document doc = Jsoup.parse(content);
//                    Element e = doc.getElementById("index_main");
//                    Elements es = e.select(".content");
//                    System.out.println(es.size());
//                    ListIterator<Element> it = es.listIterator();
//                    while(it.hasNext()) {
//                        Element item = it.next();
//                        Element tmp = item.select("h3>a").first();
//                        String title = tmp.attr("title");
//                        String href = tmp.attr("href");
//                        tmp = item.select("h3>span>a").first();
//                        System.out.println("category" + tmp.ownText());
//                        String c = item.select("div.digg").first().nextSibling().outerHtml();
//                        String date = item.select("span.date").first().ownText();
//                        System.out.println("title="+ title + ", href="+ href);
////                        item.attr("title");
////                        String href = item.attr("href");
////                        String text = item.ownText();
////                        System.out.println("title: " + title + ", href=" + href + ", text=" + text);
////                        System.out.println("Title=" + title);
//                    }
////                    System.out.println(e.html());
//                } else {
//                    // Stream content out
//                }
//            }
//
//        } catch (ClientProtocolException e) {
//            // TODO Auto-generated catch block
//            e.printStackTrace();
//        } catch (IOException e) {
//            // TODO Auto-generated catch block
//            e.printStackTrace();
//        }

    }
    
    public static String parseMain(String content) {
        Document doc = Jsoup.parse(content);
        Element e = doc.select(".news_main").first();
        System.out.println(e.select("div.title > h3 > a").attr("title"));
//        String title = e.select("div.title").first().attr("title");
//        System.out.println(title);
        String c = e.select("#news_content").first().html();
        System.out.println(c);
        return null;
    }
}
